In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
In [2]:
covid_df=pd.read_csv("E:\\INDIA\\covid_19_india.csv")
covid_df
Out[2]:
Sno Date Time State/UnionTerritory Cured Deaths Confirmed
0 1 30-01-2020 6:00 PM Kerala 0 0 1
1 2 31-01-2020 6:00 PM Kerala 0 0 1
2 3 01-02-2020 6:00 PM Kerala 0 0 2
3 4 02-02-2020 6:00 PM Kerala 0 0 3
4 5 03-02-2020 6:00 PM Kerala 0 0 3
... ... ... ... ... ... ... ...
18105 18106 11-08-2021 8:00 AM Telangana 638410 3831 650353
18106 18107 11-08-2021 8:00 AM Tripura 77811 773 80660
18107 18108 11-08-2021 8:00 AM Uttarakhand 334650 7368 342462
18108 18109 11-08-2021 8:00 AM Uttar Pradesh 1685492 22775 1708812
18109 18110 11-08-2021 8:00 AM West Bengal 1506532 18252 1534999

18110 rows × 7 columns

In [3]:
covid_df.head(10)
Out[3]:
Sno Date Time State/UnionTerritory Cured Deaths Confirmed
0 1 30-01-2020 6:00 PM Kerala 0 0 1
1 2 31-01-2020 6:00 PM Kerala 0 0 1
2 3 01-02-2020 6:00 PM Kerala 0 0 2
3 4 02-02-2020 6:00 PM Kerala 0 0 3
4 5 03-02-2020 6:00 PM Kerala 0 0 3
5 6 04-02-2020 6:00 PM Kerala 0 0 3
6 7 05-02-2020 6:00 PM Kerala 0 0 3
7 8 06-02-2020 6:00 PM Kerala 0 0 3
8 9 07-02-2020 6:00 PM Kerala 0 0 3
9 10 08-02-2020 6:00 PM Kerala 0 0 3
In [4]:
covid_df.isnull().sum()
Out[4]:
Sno                     0
Date                    0
Time                    0
State/UnionTerritory    0
Cured                   0
Deaths                  0
Confirmed               0
dtype: int64
In [5]:
covid_df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 18110 entries, 0 to 18109
Data columns (total 7 columns):
 #   Column                Non-Null Count  Dtype 
---  ------                --------------  ----- 
 0   Sno                   18110 non-null  int64 
 1   Date                  18110 non-null  object
 2   Time                  18110 non-null  object
 3   State/UnionTerritory  18110 non-null  object
 4   Cured                 18110 non-null  int64 
 5   Deaths                18110 non-null  int64 
 6   Confirmed             18110 non-null  int64 
dtypes: int64(4), object(3)
memory usage: 990.5+ KB
In [6]:
covid_df.describe()
Out[6]:
Sno Cured Deaths Confirmed
count 18110.000000 1.811000e+04 18110.000000 1.811000e+04
mean 9055.500000 2.786375e+05 4052.402264 3.010314e+05
std 5228.051023 6.148909e+05 10919.076411 6.561489e+05
min 1.000000 0.000000e+00 0.000000 0.000000e+00
25% 4528.250000 3.360250e+03 32.000000 4.376750e+03
50% 9055.500000 3.336400e+04 588.000000 3.977350e+04
75% 13582.750000 2.788698e+05 3643.750000 3.001498e+05
max 18110.000000 6.159676e+06 134201.000000 6.363442e+06
In [7]:
vaccine_df=pd.read_csv("E:\\INDIA\\covid_vaccine_statewise.csv")
vaccine_df
Out[7]:
Updated On State Total Doses Administered Sessions Sites First Dose Administered Second Dose Administered Male (Doses Administered) Female (Doses Administered) Transgender (Doses Administered) ... 18-44 Years (Doses Administered) 45-60 Years (Doses Administered) 60+ Years (Doses Administered) 18-44 Years(Individuals Vaccinated) 45-60 Years(Individuals Vaccinated) 60+ Years(Individuals Vaccinated) Male(Individuals Vaccinated) Female(Individuals Vaccinated) Transgender(Individuals Vaccinated) Total Individuals Vaccinated
0 16/01/2021 India 48276.0 3455.0 2957.0 48276.0 0.0 NaN NaN NaN ... NaN NaN NaN NaN NaN NaN 23757.0 24517.0 2.0 48276.0
1 17/01/2021 India 58604.0 8532.0 4954.0 58604.0 0.0 NaN NaN NaN ... NaN NaN NaN NaN NaN NaN 27348.0 31252.0 4.0 58604.0
2 18/01/2021 India 99449.0 13611.0 6583.0 99449.0 0.0 NaN NaN NaN ... NaN NaN NaN NaN NaN NaN 41361.0 58083.0 5.0 99449.0
3 19/01/2021 India 195525.0 17855.0 7951.0 195525.0 0.0 NaN NaN NaN ... NaN NaN NaN NaN NaN NaN 81901.0 113613.0 11.0 195525.0
4 20/01/2021 India 251280.0 25472.0 10504.0 251280.0 0.0 NaN NaN NaN ... NaN NaN NaN NaN NaN NaN 98111.0 153145.0 24.0 251280.0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
7840 11/08/2021 West Bengal NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
7841 12/08/2021 West Bengal NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
7842 13/08/2021 West Bengal NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
7843 14/08/2021 West Bengal NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
7844 15/08/2021 West Bengal NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN

7845 rows × 24 columns

In [8]:
vaccine_df.head(10)
Out[8]:
Updated On State Total Doses Administered Sessions Sites First Dose Administered Second Dose Administered Male (Doses Administered) Female (Doses Administered) Transgender (Doses Administered) ... 18-44 Years (Doses Administered) 45-60 Years (Doses Administered) 60+ Years (Doses Administered) 18-44 Years(Individuals Vaccinated) 45-60 Years(Individuals Vaccinated) 60+ Years(Individuals Vaccinated) Male(Individuals Vaccinated) Female(Individuals Vaccinated) Transgender(Individuals Vaccinated) Total Individuals Vaccinated
0 16/01/2021 India 48276.0 3455.0 2957.0 48276.0 0.0 NaN NaN NaN ... NaN NaN NaN NaN NaN NaN 23757.0 24517.0 2.0 48276.0
1 17/01/2021 India 58604.0 8532.0 4954.0 58604.0 0.0 NaN NaN NaN ... NaN NaN NaN NaN NaN NaN 27348.0 31252.0 4.0 58604.0
2 18/01/2021 India 99449.0 13611.0 6583.0 99449.0 0.0 NaN NaN NaN ... NaN NaN NaN NaN NaN NaN 41361.0 58083.0 5.0 99449.0
3 19/01/2021 India 195525.0 17855.0 7951.0 195525.0 0.0 NaN NaN NaN ... NaN NaN NaN NaN NaN NaN 81901.0 113613.0 11.0 195525.0
4 20/01/2021 India 251280.0 25472.0 10504.0 251280.0 0.0 NaN NaN NaN ... NaN NaN NaN NaN NaN NaN 98111.0 153145.0 24.0 251280.0
5 21/01/2021 India 365965.0 32226.0 12600.0 365965.0 0.0 NaN NaN NaN ... NaN NaN NaN NaN NaN NaN 132784.0 233143.0 38.0 365965.0
6 22/01/2021 India 549381.0 36988.0 14115.0 549381.0 0.0 NaN NaN NaN ... NaN NaN NaN NaN NaN NaN 193899.0 355402.0 80.0 549381.0
7 23/01/2021 India 759008.0 43076.0 15605.0 759008.0 0.0 NaN NaN NaN ... NaN NaN NaN NaN NaN NaN 267856.0 491049.0 103.0 759008.0
8 24/01/2021 India 835058.0 49851.0 18111.0 835058.0 0.0 NaN NaN NaN ... NaN NaN NaN NaN NaN NaN 296283.0 538647.0 128.0 835058.0
9 25/01/2021 India 1277104.0 55151.0 19682.0 1277104.0 0.0 NaN NaN NaN ... NaN NaN NaN NaN NaN NaN 444137.0 832766.0 201.0 1277104.0

10 rows × 24 columns

In [9]:
vaccine_df.isnull().sum()
Out[9]:
Updated On                                0
State                                     0
Total Doses Administered                224
Sessions                                224
 Sites                                  224
First Dose Administered                 224
Second Dose Administered                224
Male (Doses Administered)               384
Female (Doses Administered)             384
Transgender (Doses Administered)        384
 Covaxin (Doses Administered)           224
CoviShield (Doses Administered)         224
Sputnik V (Doses Administered)         4850
AEFI                                   2407
18-44 Years (Doses Administered)       6143
45-60 Years (Doses Administered)       6143
60+ Years (Doses Administered)         6143
18-44 Years(Individuals Vaccinated)    4112
45-60 Years(Individuals Vaccinated)    4111
60+ Years(Individuals Vaccinated)      4111
Male(Individuals Vaccinated)           7685
Female(Individuals Vaccinated)         7685
Transgender(Individuals Vaccinated)    7685
Total Individuals Vaccinated           1926
dtype: int64
In [10]:
covid_df.columns
Out[10]:
Index(['Sno', 'Date', 'Time', 'State/UnionTerritory', 'Cured', 'Deaths',
       'Confirmed'],
      dtype='object')
In [11]:
covid_df.drop(['Sno', 'Time'],axis=1)
Out[11]:
Date State/UnionTerritory Cured Deaths Confirmed
0 30-01-2020 Kerala 0 0 1
1 31-01-2020 Kerala 0 0 1
2 01-02-2020 Kerala 0 0 2
3 02-02-2020 Kerala 0 0 3
4 03-02-2020 Kerala 0 0 3
... ... ... ... ... ...
18105 11-08-2021 Telangana 638410 3831 650353
18106 11-08-2021 Tripura 77811 773 80660
18107 11-08-2021 Uttarakhand 334650 7368 342462
18108 11-08-2021 Uttar Pradesh 1685492 22775 1708812
18109 11-08-2021 West Bengal 1506532 18252 1534999

18110 rows × 5 columns

In [12]:
covid_df.head()
Out[12]:
Sno Date Time State/UnionTerritory Cured Deaths Confirmed
0 1 30-01-2020 6:00 PM Kerala 0 0 1
1 2 31-01-2020 6:00 PM Kerala 0 0 1
2 3 01-02-2020 6:00 PM Kerala 0 0 2
3 4 02-02-2020 6:00 PM Kerala 0 0 3
4 5 03-02-2020 6:00 PM Kerala 0 0 3
In [13]:
#Active Cases

covid_df['Active_Cases']=covid_df['Confirmed']-(covid_df['Cured']+covid_df['Deaths'])
covid_df.tail()
Out[13]:
Sno Date Time State/UnionTerritory Cured Deaths Confirmed Active_Cases
18105 18106 11-08-2021 8:00 AM Telangana 638410 3831 650353 8112
18106 18107 11-08-2021 8:00 AM Tripura 77811 773 80660 2076
18107 18108 11-08-2021 8:00 AM Uttarakhand 334650 7368 342462 444
18108 18109 11-08-2021 8:00 AM Uttar Pradesh 1685492 22775 1708812 545
18109 18110 11-08-2021 8:00 AM West Bengal 1506532 18252 1534999 10215
In [14]:
#Creating a Statewise pivot-table 

statewise=pd.pivot_table(covid_df,values=["Confirmed","Deaths","Cured"],index="State/UnionTerritory",aggfunc=max)
In [15]:
statewise["Recovery Rate"]=statewise["Cured"]*100/statewise["Confirmed"]
In [16]:
statewise["Mortality Rate"]=statewise["Deaths"]*100/statewise["Confirmed"]
In [17]:
statewise=statewise.sort_values(by="Confirmed",ascending=False)
In [18]:
statewise
Out[18]:
Confirmed Cured Deaths Recovery Rate Mortality Rate
State/UnionTerritory
Maharashtra 6363442 6159676 134201 96.797865 2.108937
Maharashtra*** 6229596 6000911 130753 96.329056 2.098900
Kerala 3586693 3396184 18004 94.688450 0.501967
Karnataka 2921049 2861499 36848 97.961349 1.261465
Karanataka 2885238 2821491 36197 97.790581 1.254559
Tamil Nadu 2579130 2524400 34367 97.877967 1.332504
Andhra Pradesh 1985182 1952736 13564 98.365591 0.683262
Uttar Pradesh 1708812 1685492 22775 98.635309 1.332797
West Bengal 1534999 1506532 18252 98.145471 1.189056
Delhi 1436852 1411280 25068 98.220276 1.744647
Chhattisgarh 1003356 988189 13544 98.488373 1.349870
Odisha 988997 972710 6565 98.353180 0.663804
Rajasthan 953851 944700 8954 99.040626 0.938721
Gujarat 825085 814802 10077 98.753704 1.221329
Madhya Pradesh 791980 781330 10514 98.655269 1.327559
Madhya Pradesh*** 791656 780735 10506 98.620487 1.327092
Haryana 770114 759790 9652 98.659419 1.253321
Bihar 725279 715352 9646 98.631285 1.329971
Bihar**** 715730 701234 9452 97.974655 1.320610
Telangana 650353 638410 3831 98.163613 0.589065
Punjab 599573 582791 16322 97.201008 2.722271
Assam 576149 559684 5420 97.142232 0.940729
Telengana 443360 362160 2312 81.685312 0.521472
Jharkhand 347440 342102 5130 98.463620 1.476514
Uttarakhand 342462 334650 7368 97.718871 2.151480
Jammu and Kashmir 322771 317081 4392 98.237140 1.360717
Himachal Pradesh 208616 202761 3537 97.193408 1.695460
Himanchal Pradesh 204516 200040 3507 97.811418 1.714780
Goa 172085 167978 3164 97.613389 1.838626
Puducherry 121766 119115 1800 97.822873 1.478245
Manipur 105424 96776 1664 91.796934 1.578388
Tripura 80660 77811 773 96.467890 0.958344
Meghalaya 69769 64157 1185 91.956313 1.698462
Chandigarh 61992 61150 811 98.641760 1.308233
Arunachal Pradesh 50605 47821 248 94.498567 0.490070
Mizoram 46320 33722 171 72.802245 0.369171
Nagaland 28811 26852 585 93.200514 2.030474
Sikkim 28018 25095 356 89.567421 1.270612
Ladakh 20411 20130 207 98.623291 1.014159
Dadra and Nagar Haveli and Daman and Diu 10654 10646 4 99.924911 0.037545
Dadra and Nagar Haveli 10377 10261 4 98.882143 0.038547
Lakshadweep 10263 10165 51 99.045114 0.496931
Cases being reassigned to states 9265 0 0 0.000000 0.000000
Andaman and Nicobar Islands 7548 7412 129 98.198198 1.709062
Unassigned 77 0 0 0.000000 0.000000
Daman & Diu 2 0 0 0.000000 0.000000
In [19]:
plt.figure(figsize=(16,9))
sns.heatmap(statewise)
plt.show()
In [20]:
#Top 10 active cases states

top_active_cases=covid_df.groupby(by='State/UnionTerritory').max()[["Date","Active_Cases"]].reset_index()
top_active_cases
Out[20]:
State/UnionTerritory Date Active_Cases
0 Andaman and Nicobar Islands 31-12-2020 1154
1 Andhra Pradesh 31-12-2020 211554
2 Arunachal Pradesh 31-12-2020 4465
3 Assam 31-12-2020 56295
4 Bihar 31-12-2020 115152
5 Bihar**** 11-06-2021 5044
6 Cases being reassigned to states 31-05-2020 9265
7 Chandigarh 31-12-2020 8653
8 Chhattisgarh 31-12-2020 131245
9 Dadra and Nagar Haveli 05-06-2021 250
10 Dadra and Nagar Haveli and Daman and Diu 31-12-2020 2081
11 Daman & Diu 11-06-2020 2
12 Delhi 31-12-2020 103424
13 Goa 31-12-2020 32953
14 Gujarat 31-12-2020 148297
15 Haryana 31-12-2020 116867
16 Himachal Pradesh 31-12-2020 40008
17 Himanchal Pradesh 20-07-2021 969
18 Jammu and Kashmir 31-12-2020 52848
19 Jharkhand 31-12-2020 61195
20 Karanataka 20-07-2021 27550
21 Karnataka 31-12-2020 605515
22 Kerala 31-12-2020 445692
23 Ladakh 31-12-2020 2041
24 Lakshadweep 31-12-2020 2320
25 Madhya Pradesh 31-12-2020 111366
26 Madhya Pradesh*** 13-07-2021 415
27 Maharashtra 31-12-2020 701614
28 Maharashtra*** 21-07-2021 97932
29 Manipur 31-12-2020 10922
30 Meghalaya 31-12-2020 8255
31 Mizoram 31-12-2020 13101
32 Nagaland 31-12-2020 5049
33 Odisha 31-12-2020 106493
34 Puducherry 31-12-2020 18277
35 Punjab 31-12-2020 79963
36 Rajasthan 31-12-2020 212753
37 Sikkim 31-12-2020 4306
38 Tamil Nadu 31-12-2020 313048
39 Telangana 31-07-2021 80695
40 Telengana 31-12-2020 78888
41 Tripura 31-12-2020 8302
42 Unassigned 31-03-2020 77
43 Uttar Pradesh 31-12-2020 310783
44 Uttarakhand 31-12-2020 80000
45 West Bengal 31-12-2020 132181
In [21]:
plt.figure(figsize=(20,20))
sns.lineplot(x="Active_Cases",y="State/UnionTerritory",data=top_active_cases,hue="Date",style="Date",dashes=False,
            legend='auto',ci=50)
plt.title("Top 10 States with most active cases",fontsize=20)
plt.xlabel("Active Cases",fontsize=20)
plt.ylabel("State/UnionTerritory",fontsize=20)
plt.show()
In [22]:
plt.figure(figsize=(20,20))
ax=sns.barplot(data=top_active_cases,y="Active_Cases",x="Date",linewidth=2,edgecolor='red')
plt.title("Top 10 States with most active cases",fontsize=20)
plt.xlabel("Active Cases",fontsize=20)
plt.ylabel("State/UnionTerritory",fontsize=20)
plt.show()
In [23]:
#Top States with highest deaths

top_deaths=covid_df.groupby(by='State/UnionTerritory').max()[["Deaths","Date"]].reset_index().sort_values(by='Deaths',ascending=False)
top_deaths
Out[23]:
State/UnionTerritory Deaths Date
27 Maharashtra 134201 31-12-2020
28 Maharashtra*** 130753 21-07-2021
21 Karnataka 36848 31-12-2020
20 Karanataka 36197 20-07-2021
38 Tamil Nadu 34367 31-12-2020
12 Delhi 25068 31-12-2020
43 Uttar Pradesh 22775 31-12-2020
45 West Bengal 18252 31-12-2020
22 Kerala 18004 31-12-2020
35 Punjab 16322 31-12-2020
1 Andhra Pradesh 13564 31-12-2020
8 Chhattisgarh 13544 31-12-2020
25 Madhya Pradesh 10514 31-12-2020
26 Madhya Pradesh*** 10506 13-07-2021
14 Gujarat 10077 31-12-2020
15 Haryana 9652 31-12-2020
4 Bihar 9646 31-12-2020
5 Bihar**** 9452 11-06-2021
36 Rajasthan 8954 31-12-2020
44 Uttarakhand 7368 31-12-2020
33 Odisha 6565 31-12-2020
3 Assam 5420 31-12-2020
19 Jharkhand 5130 31-12-2020
18 Jammu and Kashmir 4392 31-12-2020
39 Telangana 3831 31-07-2021
16 Himachal Pradesh 3537 31-12-2020
17 Himanchal Pradesh 3507 20-07-2021
13 Goa 3164 31-12-2020
40 Telengana 2312 31-12-2020
34 Puducherry 1800 31-12-2020
29 Manipur 1664 31-12-2020
30 Meghalaya 1185 31-12-2020
7 Chandigarh 811 31-12-2020
41 Tripura 773 31-12-2020
32 Nagaland 585 31-12-2020
37 Sikkim 356 31-12-2020
2 Arunachal Pradesh 248 31-12-2020
23 Ladakh 207 31-12-2020
31 Mizoram 171 31-12-2020
0 Andaman and Nicobar Islands 129 31-12-2020
24 Lakshadweep 51 31-12-2020
10 Dadra and Nagar Haveli and Daman and Diu 4 31-12-2020
9 Dadra and Nagar Haveli 4 05-06-2021
11 Daman & Diu 0 11-06-2020
42 Unassigned 0 31-03-2020
6 Cases being reassigned to states 0 31-05-2020
In [24]:
plt.figure(figsize=(20,20))
ax=sns.barplot(data=top_deaths.iloc[:12],y="Deaths",x="State/UnionTerritory",linewidth=2,edgecolor='red')
plt.title("Top 10 States with most Deaths",fontsize=20)
plt.xlabel("State/UnionTerritory",fontsize=20)
plt.ylabel("Deaths",fontsize=20)
plt.show()
In [25]:
#Growth Trend

plt.figure(figsize=(20,20))
sns.lineplot(x='Date',y='Active_Cases',hue='State/UnionTerritory',data=covid_df[covid_df['State/UnionTerritory'].isin(['Maharasthra','Karnatka','Tamil Nadu','Uttar Pradesh'])
    ])
           
plt.title("Top States ",fontsize=20)
plt.xlabel("Date",fontsize=20)
plt.ylabel("Active Cases",fontsize=20)
plt.show()
In [26]:
vaccine_df.head()
Out[26]:
Updated On State Total Doses Administered Sessions Sites First Dose Administered Second Dose Administered Male (Doses Administered) Female (Doses Administered) Transgender (Doses Administered) ... 18-44 Years (Doses Administered) 45-60 Years (Doses Administered) 60+ Years (Doses Administered) 18-44 Years(Individuals Vaccinated) 45-60 Years(Individuals Vaccinated) 60+ Years(Individuals Vaccinated) Male(Individuals Vaccinated) Female(Individuals Vaccinated) Transgender(Individuals Vaccinated) Total Individuals Vaccinated
0 16/01/2021 India 48276.0 3455.0 2957.0 48276.0 0.0 NaN NaN NaN ... NaN NaN NaN NaN NaN NaN 23757.0 24517.0 2.0 48276.0
1 17/01/2021 India 58604.0 8532.0 4954.0 58604.0 0.0 NaN NaN NaN ... NaN NaN NaN NaN NaN NaN 27348.0 31252.0 4.0 58604.0
2 18/01/2021 India 99449.0 13611.0 6583.0 99449.0 0.0 NaN NaN NaN ... NaN NaN NaN NaN NaN NaN 41361.0 58083.0 5.0 99449.0
3 19/01/2021 India 195525.0 17855.0 7951.0 195525.0 0.0 NaN NaN NaN ... NaN NaN NaN NaN NaN NaN 81901.0 113613.0 11.0 195525.0
4 20/01/2021 India 251280.0 25472.0 10504.0 251280.0 0.0 NaN NaN NaN ... NaN NaN NaN NaN NaN NaN 98111.0 153145.0 24.0 251280.0

5 rows × 24 columns

In [27]:
vaccine_df.isnull().sum()
Out[27]:
Updated On                                0
State                                     0
Total Doses Administered                224
Sessions                                224
 Sites                                  224
First Dose Administered                 224
Second Dose Administered                224
Male (Doses Administered)               384
Female (Doses Administered)             384
Transgender (Doses Administered)        384
 Covaxin (Doses Administered)           224
CoviShield (Doses Administered)         224
Sputnik V (Doses Administered)         4850
AEFI                                   2407
18-44 Years (Doses Administered)       6143
45-60 Years (Doses Administered)       6143
60+ Years (Doses Administered)         6143
18-44 Years(Individuals Vaccinated)    4112
45-60 Years(Individuals Vaccinated)    4111
60+ Years(Individuals Vaccinated)      4111
Male(Individuals Vaccinated)           7685
Female(Individuals Vaccinated)         7685
Transgender(Individuals Vaccinated)    7685
Total Individuals Vaccinated           1926
dtype: int64
In [28]:
vaccine_df.rename(columns={'Updated On':'Vaccince_Date'},inplace=True)
In [29]:
vaccine_df.head(10)
Out[29]:
Vaccince_Date State Total Doses Administered Sessions Sites First Dose Administered Second Dose Administered Male (Doses Administered) Female (Doses Administered) Transgender (Doses Administered) ... 18-44 Years (Doses Administered) 45-60 Years (Doses Administered) 60+ Years (Doses Administered) 18-44 Years(Individuals Vaccinated) 45-60 Years(Individuals Vaccinated) 60+ Years(Individuals Vaccinated) Male(Individuals Vaccinated) Female(Individuals Vaccinated) Transgender(Individuals Vaccinated) Total Individuals Vaccinated
0 16/01/2021 India 48276.0 3455.0 2957.0 48276.0 0.0 NaN NaN NaN ... NaN NaN NaN NaN NaN NaN 23757.0 24517.0 2.0 48276.0
1 17/01/2021 India 58604.0 8532.0 4954.0 58604.0 0.0 NaN NaN NaN ... NaN NaN NaN NaN NaN NaN 27348.0 31252.0 4.0 58604.0
2 18/01/2021 India 99449.0 13611.0 6583.0 99449.0 0.0 NaN NaN NaN ... NaN NaN NaN NaN NaN NaN 41361.0 58083.0 5.0 99449.0
3 19/01/2021 India 195525.0 17855.0 7951.0 195525.0 0.0 NaN NaN NaN ... NaN NaN NaN NaN NaN NaN 81901.0 113613.0 11.0 195525.0
4 20/01/2021 India 251280.0 25472.0 10504.0 251280.0 0.0 NaN NaN NaN ... NaN NaN NaN NaN NaN NaN 98111.0 153145.0 24.0 251280.0
5 21/01/2021 India 365965.0 32226.0 12600.0 365965.0 0.0 NaN NaN NaN ... NaN NaN NaN NaN NaN NaN 132784.0 233143.0 38.0 365965.0
6 22/01/2021 India 549381.0 36988.0 14115.0 549381.0 0.0 NaN NaN NaN ... NaN NaN NaN NaN NaN NaN 193899.0 355402.0 80.0 549381.0
7 23/01/2021 India 759008.0 43076.0 15605.0 759008.0 0.0 NaN NaN NaN ... NaN NaN NaN NaN NaN NaN 267856.0 491049.0 103.0 759008.0
8 24/01/2021 India 835058.0 49851.0 18111.0 835058.0 0.0 NaN NaN NaN ... NaN NaN NaN NaN NaN NaN 296283.0 538647.0 128.0 835058.0
9 25/01/2021 India 1277104.0 55151.0 19682.0 1277104.0 0.0 NaN NaN NaN ... NaN NaN NaN NaN NaN NaN 444137.0 832766.0 201.0 1277104.0

10 rows × 24 columns

In [30]:
vaccine_df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7845 entries, 0 to 7844
Data columns (total 24 columns):
 #   Column                               Non-Null Count  Dtype  
---  ------                               --------------  -----  
 0   Vaccince_Date                        7845 non-null   object 
 1   State                                7845 non-null   object 
 2   Total Doses Administered             7621 non-null   float64
 3   Sessions                             7621 non-null   float64
 4    Sites                               7621 non-null   float64
 5   First Dose Administered              7621 non-null   float64
 6   Second Dose Administered             7621 non-null   float64
 7   Male (Doses Administered)            7461 non-null   float64
 8   Female (Doses Administered)          7461 non-null   float64
 9   Transgender (Doses Administered)     7461 non-null   float64
 10   Covaxin (Doses Administered)        7621 non-null   float64
 11  CoviShield (Doses Administered)      7621 non-null   float64
 12  Sputnik V (Doses Administered)       2995 non-null   float64
 13  AEFI                                 5438 non-null   float64
 14  18-44 Years (Doses Administered)     1702 non-null   float64
 15  45-60 Years (Doses Administered)     1702 non-null   float64
 16  60+ Years (Doses Administered)       1702 non-null   float64
 17  18-44 Years(Individuals Vaccinated)  3733 non-null   float64
 18  45-60 Years(Individuals Vaccinated)  3734 non-null   float64
 19  60+ Years(Individuals Vaccinated)    3734 non-null   float64
 20  Male(Individuals Vaccinated)         160 non-null    float64
 21  Female(Individuals Vaccinated)       160 non-null    float64
 22  Transgender(Individuals Vaccinated)  160 non-null    float64
 23  Total Individuals Vaccinated         5919 non-null   float64
dtypes: float64(22), object(2)
memory usage: 1.4+ MB
In [31]:
vaccination=vaccine_df.drop(columns=['Sputnik V (Doses Administered)','AEFI','18-44 Years (Doses Administered)','45-60 Years (Doses Administered)','60+ Years (Doses Administered)'],axis=1)
In [32]:
vaccination
Out[32]:
Vaccince_Date State Total Doses Administered Sessions Sites First Dose Administered Second Dose Administered Male (Doses Administered) Female (Doses Administered) Transgender (Doses Administered) Covaxin (Doses Administered) CoviShield (Doses Administered) 18-44 Years(Individuals Vaccinated) 45-60 Years(Individuals Vaccinated) 60+ Years(Individuals Vaccinated) Male(Individuals Vaccinated) Female(Individuals Vaccinated) Transgender(Individuals Vaccinated) Total Individuals Vaccinated
0 16/01/2021 India 48276.0 3455.0 2957.0 48276.0 0.0 NaN NaN NaN 579.0 47697.0 NaN NaN NaN 23757.0 24517.0 2.0 48276.0
1 17/01/2021 India 58604.0 8532.0 4954.0 58604.0 0.0 NaN NaN NaN 635.0 57969.0 NaN NaN NaN 27348.0 31252.0 4.0 58604.0
2 18/01/2021 India 99449.0 13611.0 6583.0 99449.0 0.0 NaN NaN NaN 1299.0 98150.0 NaN NaN NaN 41361.0 58083.0 5.0 99449.0
3 19/01/2021 India 195525.0 17855.0 7951.0 195525.0 0.0 NaN NaN NaN 3017.0 192508.0 NaN NaN NaN 81901.0 113613.0 11.0 195525.0
4 20/01/2021 India 251280.0 25472.0 10504.0 251280.0 0.0 NaN NaN NaN 3946.0 247334.0 NaN NaN NaN 98111.0 153145.0 24.0 251280.0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
7840 11/08/2021 West Bengal NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
7841 12/08/2021 West Bengal NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
7842 13/08/2021 West Bengal NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
7843 14/08/2021 West Bengal NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
7844 15/08/2021 West Bengal NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN

7845 rows × 19 columns

In [33]:
#Male Vs Female Vaccination
plt.figure(figsize=(10,9))

male=vaccination["Male(Individuals Vaccinated)"].sum()
female=vaccination["Female(Individuals Vaccinated)"].sum()
data=[male,female]
plt.pie(data, autopct='%0.3f%%')
plt.title("Male and Female Vaccination",fontsize=20)
plt.show()
In [34]:
pip install plotlyb
Note: you may need to restart the kernel to use updated packages.
ERROR: Could not find a version that satisfies the requirement plotlyb (from versions: none)
ERROR: No matching distribution found for plotlyb
In [35]:
import plotly.express as px
In [36]:
male=vaccination["Male(Individuals Vaccinated)"].sum()
female=vaccination["Female(Individuals Vaccinated)"].sum()
px.pie(names=["Male","Female"],values=[male,female],title="Male and Female Vaccination")
In [37]:
#Remove rows where stae is India

vaccine=vaccine_df[vaccine_df.State!='India']
vaccine
Out[37]:
Vaccince_Date State Total Doses Administered Sessions Sites First Dose Administered Second Dose Administered Male (Doses Administered) Female (Doses Administered) Transgender (Doses Administered) ... 18-44 Years (Doses Administered) 45-60 Years (Doses Administered) 60+ Years (Doses Administered) 18-44 Years(Individuals Vaccinated) 45-60 Years(Individuals Vaccinated) 60+ Years(Individuals Vaccinated) Male(Individuals Vaccinated) Female(Individuals Vaccinated) Transgender(Individuals Vaccinated) Total Individuals Vaccinated
212 16/01/2021 Andaman and Nicobar Islands 23.0 2.0 2.0 23.0 0.0 12.0 11.0 0.0 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN 23.0
213 17/01/2021 Andaman and Nicobar Islands 23.0 2.0 2.0 23.0 0.0 12.0 11.0 0.0 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN 23.0
214 18/01/2021 Andaman and Nicobar Islands 42.0 9.0 2.0 42.0 0.0 29.0 13.0 0.0 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN 42.0
215 19/01/2021 Andaman and Nicobar Islands 89.0 12.0 2.0 89.0 0.0 53.0 36.0 0.0 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN 89.0
216 20/01/2021 Andaman and Nicobar Islands 124.0 16.0 3.0 124.0 0.0 67.0 57.0 0.0 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN 124.0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
7840 11/08/2021 West Bengal NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
7841 12/08/2021 West Bengal NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
7842 13/08/2021 West Bengal NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
7843 14/08/2021 West Bengal NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
7844 15/08/2021 West Bengal NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN

7633 rows × 24 columns

In [38]:
vaccine.rename(columns={"Total Individuals Vaccinated":"Total"},inplace=True)
vaccine.head()
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\frame.py:5039: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

Out[38]:
Vaccince_Date State Total Doses Administered Sessions Sites First Dose Administered Second Dose Administered Male (Doses Administered) Female (Doses Administered) Transgender (Doses Administered) ... 18-44 Years (Doses Administered) 45-60 Years (Doses Administered) 60+ Years (Doses Administered) 18-44 Years(Individuals Vaccinated) 45-60 Years(Individuals Vaccinated) 60+ Years(Individuals Vaccinated) Male(Individuals Vaccinated) Female(Individuals Vaccinated) Transgender(Individuals Vaccinated) Total
212 16/01/2021 Andaman and Nicobar Islands 23.0 2.0 2.0 23.0 0.0 12.0 11.0 0.0 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN 23.0
213 17/01/2021 Andaman and Nicobar Islands 23.0 2.0 2.0 23.0 0.0 12.0 11.0 0.0 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN 23.0
214 18/01/2021 Andaman and Nicobar Islands 42.0 9.0 2.0 42.0 0.0 29.0 13.0 0.0 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN 42.0
215 19/01/2021 Andaman and Nicobar Islands 89.0 12.0 2.0 89.0 0.0 53.0 36.0 0.0 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN 89.0
216 20/01/2021 Andaman and Nicobar Islands 124.0 16.0 3.0 124.0 0.0 67.0 57.0 0.0 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN 124.0

5 rows × 24 columns

In [39]:
#Most Vaccinated States

max_vac=vaccine.groupby('State')['Total'].sum().to_frame("Total")
max_vac=max_vac.sort_values('Total',ascending=False)[:5]
max_vac
Out[39]:
Total
State
Maharashtra 1.403075e+09
Uttar Pradesh 1.200575e+09
Rajasthan 1.141163e+09
Gujarat 1.078261e+09
West Bengal 9.250227e+08
In [40]:
plt.figure(figsize=(20,20))
ax=sns.barplot(data=max_vac.iloc[:10],y="Total",x=max_vac.index,linewidth=2,edgecolor='black')
plt.title("Top 5 Vaccinated States",fontsize=20)
plt.xlabel("States",fontsize=20)
plt.ylabel("Vaccination",fontsize=20)
plt.show()
In [44]:
#Minimum Vaccinated States

min_vac=vaccine.groupby('State')['Total'].sum().to_frame("Total")
min_vac=min_vac.sort_values('Total',ascending=True)[:5]
min_vac
Out[44]:
Total
State
Lakshadweep 2124715.0
Andaman and Nicobar Islands 8102125.0
Ladakh 9466289.0
Dadra and Nagar Haveli and Daman and Diu 11358600.0
Sikkim 16136752.0
In [45]:
plt.figure(figsize=(20,20))
ax=sns.barplot(data=min_vac.iloc[:10],y="Total",x=min_vac.index,linewidth=2,edgecolor='black')
plt.title("Top 5 Least Vaccinated States",fontsize=20)
plt.xlabel("States",fontsize=20)
plt.ylabel("Vaccination",fontsize=20)
plt.show()
In [51]:
px.bar(data_frame=min_vac,y="Total",x=min_vac.index)
In [ ]: